See tutorial on Item Response Theory (IRT).
library(tidyverse)
library(ggside)
library(easystats)
library(patchwork)
library(mirt)
df <- read.csv("https://raw.githubusercontent.com/RealityBending/IllusionGameReliability/main/data/preprocessed_questionnaires.csv") |>
select(starts_with("Item_PHQ4")) |>
datawizard::remove_empty_rows()
names(df) <- str_remove_all(names(df), "Item_PHQ4_")
# s <- 'Anxiety = 1,2
# Depression = 3,4
# COV = Anxiety*Depression'
# mod1 <- mirt(df, model = mirt.model(s), itemtype = 'graded', SE = TRUE)
data <- select(df, contains("Depression"))
mod1 <- mirt(data, model = 1, itemtype = 'graded', SE = TRUE)
##
Iteration: 1, Log-Lik: -23.728, Max-Change: 0.54927
Iteration: 2, Log-Lik: -22.543, Max-Change: 0.88411
Iteration: 3, Log-Lik: -21.267, Max-Change: 1.26194
Iteration: 4, Log-Lik: -19.186, Max-Change: 0.87383
Iteration: 5, Log-Lik: -19.099, Max-Change: 0.66498
Iteration: 6, Log-Lik: -19.057, Max-Change: 0.45699
Iteration: 7, Log-Lik: -19.015, Max-Change: 0.13119
Iteration: 8, Log-Lik: -19.009, Max-Change: 0.10175
Iteration: 9, Log-Lik: -19.004, Max-Change: 0.07540
Iteration: 10, Log-Lik: -18.997, Max-Change: 0.03874
Iteration: 11, Log-Lik: -18.996, Max-Change: 0.03387
Iteration: 12, Log-Lik: -18.995, Max-Change: 0.02976
Iteration: 13, Log-Lik: -18.992, Max-Change: 0.00553
Iteration: 14, Log-Lik: -18.992, Max-Change: 0.00469
Iteration: 15, Log-Lik: -18.992, Max-Change: 0.00424
Iteration: 16, Log-Lik: -18.992, Max-Change: 0.00200
Iteration: 17, Log-Lik: -18.992, Max-Change: 0.00184
Iteration: 18, Log-Lik: -18.992, Max-Change: 0.00174
Iteration: 19, Log-Lik: -18.992, Max-Change: 0.00121
Iteration: 20, Log-Lik: -18.992, Max-Change: 0.00115
Iteration: 21, Log-Lik: -18.992, Max-Change: 0.00109
Iteration: 22, Log-Lik: -18.992, Max-Change: 0.00077
Iteration: 23, Log-Lik: -18.992, Max-Change: 0.00073
Iteration: 24, Log-Lik: -18.992, Max-Change: 0.00069
Iteration: 25, Log-Lik: -18.992, Max-Change: 0.00018
Iteration: 26, Log-Lik: -18.992, Max-Change: 0.00019
Iteration: 27, Log-Lik: -18.992, Max-Change: 0.00018
Iteration: 28, Log-Lik: -18.992, Max-Change: 0.00014
Iteration: 29, Log-Lik: -18.992, Max-Change: 0.00013
Iteration: 30, Log-Lik: -18.992, Max-Change: 0.00013
Iteration: 31, Log-Lik: -18.992, Max-Change: 0.00012
Iteration: 32, Log-Lik: -18.992, Max-Change: 0.00012
Iteration: 33, Log-Lik: -18.992, Max-Change: 0.00011
Iteration: 34, Log-Lik: -18.992, Max-Change: 0.00010
Iteration: 35, Log-Lik: -18.992, Max-Change: 0.00010
##
## Calculating information matrix...
# M2(mod1, type = "C2", calcNULL = FALSE)
itemfit(mod1)
## item S_X2 df.S_X2 RMSEA.S_X2 p.S_X2
## 1 Depression_3 28.7 1 1.86 0
## 2 Depression_4 NaN 0 NaN NaN
# IRT parameters
IRT_parms <- coef(mod1, IRTpars = TRUE, simplify = TRUE)
IRT_parms$items
## a b1 b2 b3 b4
## Depression_3 8.61 -0.477 0.350 1.334 NA
## Depression_4 7.95 -0.479 0.351 0.786 1.34
# Factor
summary(mod1)
## F1 h2
## Depression_3 0.981 0.962
## Depression_4 0.978 0.956
##
## SS loadings: 1.92
## Proportion Var: 0.959
##
## Factor correlations:
##
## F1
## F1 1
# Plots
plot(mod1, type = 'trace', theta_lim = c(-3, 3))
plot(mod1, type = 'infotrace', theta_lim = c(-3, 3))
plot(mod1, type = 'score', theta_lim = c(-3, 3))
plot(mod1, type = 'infoSE', theta_lim = c(-3, 3))
plot(mod1, type = 'rxx', theta_lim = c(-3, 3))
Typically, an optimally informative item will have a large location and broad category coverage (as indicated by location parameters) over theta.
crc <- function(mod, data, x = c(-1.25, 2.25)) {
Theta <- matrix(seq(-3, 3, by = .01))
rez <- data.frame()
for(i in 1:2) {
dat <- as.data.frame(probtrace(extract.item(mod1, i), Theta))
dat$Theta <- Theta[,1]
dat$Information <- normalize(iteminfo(extract.item(mod1, i), Theta))
dat <- pivot_longer(dat, -one_of(c("Theta", "Information")), names_to = "Answer", values_to = "Probability")
dat$Item <- names(data)[i]
rez <- rbind(rez, dat)
}
rez <- rez |>
mutate(
Answer = case_when(
Answer == "P.1" ~ "Not at all",
Answer == "P.2" ~ "Once or twice",
Answer == "P.3" ~ "Several days",
Answer == "P.4" ~ "More than half the days",
TRUE ~ "Nearly every day"
)) |>
mutate(
Answer = fct_relevel(Answer, c("Not at all", "Once or twice", "Several days", "More than half the days", "Nearly every day")))
p <- rez |>
ggplot(aes(x = Theta, y = Probability)) +
geom_line(aes(y = Information), linetype = "dotted", color = "grey") +
geom_line(aes(color = Answer)) +
geom_vline(xintercept = x, linetype = "dashed") +
facet_grid(~Item) +
theme_modern()
list(p = p, rez = rez)
}
out <- crc(mod1, data, x = c(-1.25, 2.25))
out$p + labs(x = expression(Depression~(theta)))
item4 <- out$rez |>
filter(Item == "Depression_4") |>
filter(Theta >= -1.5 & Theta <= 2.25) |>
mutate(Theta = normalize(Theta))
scores <- item4 |>
group_by(Answer) |>
filter(Probability == max(Probability)) |>
ungroup()
item4 |>
ggplot(aes(x = Theta, y = Probability, color = Answer)) +
geom_line() +
theme_modern()
data <- select(df, contains("Anxiety"))
m_anxiety <- mirt(data, model = 1, itemtype = 'graded', SE = TRUE)
##
Iteration: 1, Log-Lik: -19.484, Max-Change: 0.67923
Iteration: 2, Log-Lik: -18.134, Max-Change: 1.03571
Iteration: 3, Log-Lik: -16.679, Max-Change: 1.51219
Iteration: 4, Log-Lik: -14.704, Max-Change: 1.93439
Iteration: 5, Log-Lik: -14.462, Max-Change: 2.31611
Iteration: 6, Log-Lik: -14.307, Max-Change: 2.43061
Iteration: 7, Log-Lik: -14.021, Max-Change: 2.30202
Iteration: 8, Log-Lik: -14.006, Max-Change: 2.65289
Iteration: 9, Log-Lik: -13.987, Max-Change: 2.75531
Iteration: 10, Log-Lik: -13.821, Max-Change: 1.50400
Iteration: 11, Log-Lik: -13.805, Max-Change: 0.81955
Iteration: 12, Log-Lik: -13.801, Max-Change: 0.90549
Iteration: 13, Log-Lik: -13.785, Max-Change: 1.70550
Iteration: 14, Log-Lik: -13.779, Max-Change: 0.51799
Iteration: 15, Log-Lik: -13.778, Max-Change: 0.64409
Iteration: 16, Log-Lik: -13.770, Max-Change: 0.59843
Iteration: 17, Log-Lik: -13.769, Max-Change: 0.80317
Iteration: 18, Log-Lik: -13.767, Max-Change: 0.33958
Iteration: 19, Log-Lik: -13.767, Max-Change: 1.82883
Iteration: 20, Log-Lik: -13.765, Max-Change: 0.36195
Iteration: 21, Log-Lik: -13.765, Max-Change: 0.46691
Iteration: 22, Log-Lik: -13.763, Max-Change: 0.48612
Iteration: 23, Log-Lik: -13.762, Max-Change: 0.60732
Iteration: 24, Log-Lik: -13.762, Max-Change: 0.30250
Iteration: 25, Log-Lik: -13.762, Max-Change: 0.66072
Iteration: 26, Log-Lik: -13.761, Max-Change: 0.29525
Iteration: 27, Log-Lik: -13.761, Max-Change: 0.35435
Iteration: 28, Log-Lik: -13.760, Max-Change: 0.55548
Iteration: 29, Log-Lik: -13.759, Max-Change: 0.46850
Iteration: 30, Log-Lik: -13.759, Max-Change: 0.28578
Iteration: 31, Log-Lik: -13.758, Max-Change: 0.26341
Iteration: 32, Log-Lik: -13.758, Max-Change: 0.25786
Iteration: 33, Log-Lik: -13.758, Max-Change: 0.30994
Iteration: 34, Log-Lik: -13.757, Max-Change: 0.53972
Iteration: 35, Log-Lik: -13.757, Max-Change: 0.48032
Iteration: 36, Log-Lik: -13.757, Max-Change: 0.30180
Iteration: 37, Log-Lik: -13.756, Max-Change: 0.32014
Iteration: 38, Log-Lik: -13.756, Max-Change: 0.41981
Iteration: 39, Log-Lik: -13.756, Max-Change: 0.32593
Iteration: 40, Log-Lik: -13.756, Max-Change: 0.37159
Iteration: 41, Log-Lik: -13.755, Max-Change: 0.40391
Iteration: 42, Log-Lik: -13.755, Max-Change: 0.32229
Iteration: 43, Log-Lik: -13.755, Max-Change: 0.25980
Iteration: 44, Log-Lik: -13.755, Max-Change: 0.25479
Iteration: 45, Log-Lik: -13.754, Max-Change: 0.22322
Iteration: 46, Log-Lik: -13.754, Max-Change: 0.24019
Iteration: 47, Log-Lik: -13.754, Max-Change: 0.39856
Iteration: 48, Log-Lik: -13.754, Max-Change: 0.76429
Iteration: 49, Log-Lik: -13.753, Max-Change: 1.35886
Iteration: 50, Log-Lik: -13.753, Max-Change: 0.21730
Iteration: 51, Log-Lik: -13.753, Max-Change: 0.21397
Iteration: 52, Log-Lik: -13.753, Max-Change: 0.36451
Iteration: 53, Log-Lik: -13.753, Max-Change: 1.10765
Iteration: 54, Log-Lik: -13.752, Max-Change: 0.80621
Iteration: 55, Log-Lik: -13.752, Max-Change: 0.23646
Iteration: 56, Log-Lik: -13.752, Max-Change: 0.12858
Iteration: 57, Log-Lik: -13.752, Max-Change: 0.12158
Iteration: 58, Log-Lik: -13.752, Max-Change: 0.09857
Iteration: 59, Log-Lik: -13.752, Max-Change: 0.03461
Iteration: 60, Log-Lik: -13.752, Max-Change: 0.03430
Iteration: 61, Log-Lik: -13.752, Max-Change: 0.16160
Iteration: 62, Log-Lik: -13.752, Max-Change: 0.18197
Iteration: 63, Log-Lik: -13.752, Max-Change: 0.12825
Iteration: 64, Log-Lik: -13.752, Max-Change: 0.10943
Iteration: 65, Log-Lik: -13.752, Max-Change: 0.03532
Iteration: 66, Log-Lik: -13.752, Max-Change: 0.03592
Iteration: 67, Log-Lik: -13.752, Max-Change: 0.09662
Iteration: 68, Log-Lik: -13.752, Max-Change: 0.09634
Iteration: 69, Log-Lik: -13.752, Max-Change: 0.09158
Iteration: 70, Log-Lik: -13.752, Max-Change: 0.04876
Iteration: 71, Log-Lik: -13.752, Max-Change: 0.09551
Iteration: 72, Log-Lik: -13.752, Max-Change: 0.15216
Iteration: 73, Log-Lik: -13.751, Max-Change: 0.11593
Iteration: 74, Log-Lik: -13.751, Max-Change: 0.12471
Iteration: 75, Log-Lik: -13.751, Max-Change: 0.13171
Iteration: 76, Log-Lik: -13.751, Max-Change: 0.18460
Iteration: 77, Log-Lik: -13.751, Max-Change: 0.00088
Iteration: 78, Log-Lik: -13.751, Max-Change: 0.00082
Iteration: 79, Log-Lik: -13.751, Max-Change: 0.20522
Iteration: 80, Log-Lik: -13.751, Max-Change: 0.00089
Iteration: 81, Log-Lik: -13.751, Max-Change: 0.00083
Iteration: 82, Log-Lik: -13.751, Max-Change: 0.20668
Iteration: 83, Log-Lik: -13.751, Max-Change: 0.00078
Iteration: 84, Log-Lik: -13.751, Max-Change: 0.12739
Iteration: 85, Log-Lik: -13.751, Max-Change: 0.21932
Iteration: 86, Log-Lik: -13.751, Max-Change: 0.00078
Iteration: 87, Log-Lik: -13.751, Max-Change: 0.00072
Iteration: 88, Log-Lik: -13.751, Max-Change: 0.12902
Iteration: 89, Log-Lik: -13.751, Max-Change: 0.00006
##
## Calculating information matrix...
# M2(m_anxiety, type = "C2", calcNULL = FALSE)
itemfit(m_anxiety)
## item S_X2 df.S_X2 RMSEA.S_X2 p.S_X2
## 1 Anxiety_1 14.7 1 1.31 0
## 2 Anxiety_2 NaN 0 NaN NaN
# IRT parameters
IRT_parms <- coef(m_anxiety, IRTpars = TRUE, simplify = TRUE)
IRT_parms$items
## a b1 b2 b3
## Anxiety_1 75.3 -0.899 0.781 NA
## Anxiety_2 72.4 -0.899 0.103 1.2
# Factor
summary(m_anxiety)
## F1 h2
## Anxiety_1 1 0.999
## Anxiety_2 1 0.999
##
## SS loadings: 2
## Proportion Var: 0.999
##
## Factor correlations:
##
## F1
## F1 1
# Plots
plot(m_anxiety, type = 'score', theta_lim = c(-3, 3))
plot(m_anxiety, type = 'infoSE', theta_lim = c(-3, 3))
plot(m_anxiety, type = 'rxx', theta_lim = c(-3, 3))
out <- crc(m_anxiety, data, x = c(-1.25, 2.25))
out$p + labs(x = expression(Anxiety~(theta)))